In [1]:
import numpy as np 
import pandas as pd 
import pandas_profiling as pp
import altair as alt
from altair import Chart, X, Y, Axis, SortField
from vega_datasets import data
from altair.expr import datum
# for the notebook only (not for JupyterLab) run this command once per session
alt.renderers.enable('notebook')
alt.data_transformers.enable('default', max_rows=None) 
Out[1]:
DataTransformerRegistry.enable('default')
In [2]:
alt.data_transformers.enable('default', max_rows=None) 
Out[2]:
DataTransformerRegistry.enable('default')
In [3]:
df_winter = pd.read_csv('winter.csv')
df_summer = pd.read_csv('summer.csv')
df_sumandwint = pd.read_csv('sumandwint.csv')
df_dictionary = pd.read_csv('dictionary.csv')
In [4]:
df_summer.head()
Out[4]:
Year City Sport Discipline Athlete Country Gender Event Medal
0 1896 Athens Aquatics Swimming HAJOS, Alfred HUN Men 100M Freestyle Gold
1 1896 Athens Aquatics Swimming HERSCHMANN, Otto AUT Men 100M Freestyle Silver
2 1896 Athens Aquatics Swimming DRIVAS, Dimitrios GRE Men 100M Freestyle For Sailors Bronze
3 1896 Athens Aquatics Swimming MALOKINIS, Ioannis GRE Men 100M Freestyle For Sailors Gold
4 1896 Athens Aquatics Swimming CHASAPIS, Spiridon GRE Men 100M Freestyle For Sailors Silver
In [6]:
line = alt.Chart(df_sumandwint).mark_line(interpolate='basis').encode(
   x = alt.X('Year:Q', scale=alt.Scale(domain=[1895, 2012])), 
   y='count(Gender)',
   color=alt.Color('Gender')
)
#   line = alt.Chart(df_sumandwint).mark_line(interpolate='basis').encode(
#    x = alt.X('Year:Q', scale=alt.Scale(domain=[1895, 2012]), title = 'Year', titleFont='Lato Regular', titleFontSize=12), 
#    y=alt.Y('count(Gender)', titleFont='Lato Regular', titleFontSize=12),
#    color=alt.Color('Gender')
# )                                                                           
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['Year'], empty='none')
rules = alt.Chart(df_sumandwint).mark_rule(color='gray').encode(
    x='Year',
).transform_filter(
    nearest
)
text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'count(Gender)', alt.value(' '))
)
selectors = alt.Chart(df_sumandwint).mark_point().encode(
    x='Year',
    opacity=alt.value(0),
).add_selection(
    nearest
)
points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

alt.layer(
    line, selectors, rules, text, points
).configure(
    background='#f3f7f7'
).configure_axis(
    labelFont='Lato Regular', labelAngle=0, labelColor='#5d646f', domain=False, labelFontSize=12,
    titleFont='Lato Regular Italic', titleFontSize=14, grid=True, gridColor='#5d646f', gridDash=[0.5, 0.5, 0.5], gridOpacity=0.4, 
    tickColor='#5d646f', tickDash=[0.5, 0.5, 0.5], tickOpacity=0.4                 
).configure_view(
    stroke='#f3f7f7'
).configure_legend(
    strokeColor='gray',
    fillColor='#f3f7f7',
    padding=10,
    cornerRadius=10
).properties(
    width=700, height=330, padding=30,
    title = alt.TitleParams(text = 'Gender Participation in the Olympic games', anchor='start',
                            font = 'Lato Bold', fontSize = 18, color = '#3E454F')
)
Out[6]: